!pip install -U kaleido # kaleido required for saving the plotly figures into static images
Requirement already satisfied: kaleido in /usr/local/lib/python3.7/dist-packages (0.2.1)
!pip install geopandas --quiet
!pip install geoplot --quiet
!pip install watermark
%load_ext watermark
%watermark -d -m -v -p numpy,matplotlib,sklearn,pandas
Requirement already satisfied: watermark in /usr/local/lib/python3.7/dist-packages (2.3.0) Requirement already satisfied: ipython in /usr/local/lib/python3.7/dist-packages (from watermark) (5.5.0) Requirement already satisfied: importlib-metadata<3.0 in /usr/local/lib/python3.7/dist-packages (from watermark) (2.1.3) Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata<3.0->watermark) (3.8.0) Requirement already satisfied: pexpect in /usr/local/lib/python3.7/dist-packages (from ipython->watermark) (4.8.0) Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.7/dist-packages (from ipython->watermark) (5.1.1) Requirement already satisfied: simplegeneric>0.8 in /usr/local/lib/python3.7/dist-packages (from ipython->watermark) (0.8.1) Requirement already satisfied: pickleshare in /usr/local/lib/python3.7/dist-packages (from ipython->watermark) (0.7.5) Requirement already satisfied: decorator in /usr/local/lib/python3.7/dist-packages (from ipython->watermark) (4.4.2) Requirement already satisfied: prompt-toolkit<2.0.0,>=1.0.4 in /usr/local/lib/python3.7/dist-packages (from ipython->watermark) (1.0.18) Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.7/dist-packages (from ipython->watermark) (57.4.0) Requirement already satisfied: pygments in /usr/local/lib/python3.7/dist-packages (from ipython->watermark) (2.6.1) Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.7/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython->watermark) (1.15.0) Requirement already satisfied: wcwidth in /usr/local/lib/python3.7/dist-packages (from prompt-toolkit<2.0.0,>=1.0.4->ipython->watermark) (0.2.5) Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.7/dist-packages (from pexpect->ipython->watermark) (0.7.0) The watermark extension is already loaded. To reload it, use: %reload_ext watermark Python implementation: CPython Python version : 3.7.13 IPython version : 5.5.0 numpy : 1.21.6 matplotlib: 3.2.2 sklearn : 0.0 pandas : 1.3.5 Compiler : GCC 7.5.0 OS : Linux Release : 5.4.188+ Machine : x86_64 Processor : x86_64 CPU cores : 2 Architecture: 64bit
#@title ## Base imports
import os
import sys
import pandas as pd
import plotly.express as px
#@title ## Option 1) Mount google drive and import my code
mountpoint_folder_name = "gdrive" # can be anything, doesn't have to be "drive"
project_path_within_drive = "PythonProjects/GeospatialAnalysis" #@param {type:"string"}
project_path_full = os.path.join("/content/",mountpoint_folder_name,
"MyDrive",project_path_within_drive)
try:
import google.colab.drive
import os, sys
# Need to move out of google drive directory if going to remount
%cd
# drive.mount documentation can be accessed via: drive.mount?
#Signature: drive.mount(mountpoint, force_remount=False, timeout_ms=120000, use_metadata_server=False)
google.colab.drive.mount(os.path.join("/content/",mountpoint_folder_name), force_remount=True) # mounts to a folder called mountpoint_folder_name
if project_path_full not in sys.path:
pass
#sys.path.insert(0,project_path_full)
%cd {project_path_full}
except ModuleNotFoundError: # in case not run in Google colab
import traceback
traceback.print_exc()
/root Mounted at /content/gdrive /content/gdrive/MyDrive/Computer Backups/Rahul Yerrabelli drive/PythonProjects/GeospatialAnalysis
#@title ## Option 2) Clone project files from Github
!git clone https://github.com/ryerrabelli/GeospatialAnalysis.git
project_path_full = os.path.join("/content/","GeospatialAnalysis")
sys.path.insert(1,project_path_full)
%cd GeospatialAnalysis
print(sys.path)
Cloning into 'GeospatialAnalysis'... remote: Enumerating objects: 58, done. remote: Counting objects: 100% (58/58), done. remote: Compressing objects: 100% (43/43), done. remote: Total 58 (delta 15), reused 54 (delta 11), pack-reused 0 Unpacking objects: 100% (58/58), done. ['', '/content/GeospatialAnalysis', '/content', '/env/python', '/usr/lib/python37.zip', '/usr/lib/python3.7', '/usr/lib/python3.7/lib-dynload', '/usr/local/lib/python3.7/dist-packages', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.7/dist-packages/IPython/extensions', '/root/.ipython']
image_folder_path = "outputs"
if not os.path.exists(image_folder_path):
os.mkdir(image_folder_path)
def save_figure(fig, file_name:str, animated=False):
"""
fig is of type plotly.graph_objs._figure.Figure,
Requires kaleido to be installed
"""
fig.write_html(os.path.join(image_folder_path, file_name+".html"))
if not animated:
fig.write_image(os.path.join(image_folder_path, file_name+".svg"))
fig.write_image(os.path.join(image_folder_path, file_name+".png"))
fig.write_image(os.path.join(image_folder_path, file_name+".jpg"))
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
counties = json.load(response)
To understand what is meant by long type and wife type dataframes, see https://towardsdatascience.com/visualization-with-plotly-express-comprehensive-guide-eb5ee4b50b57
import pandas as pd
fips2county = pd.read_csv("data/fips2county.tsv", sep="\t", comment='#', dtype=str)
# The ent CSV file only contains the counties which are analyzable
df_orig = pd.read_csv("data/2022_04_10 ent initial output.csv", dtype={"FIPS": str})
# Merge with the fips 2 county standard data set
df_wide = pd.merge(left=df_orig, right=fips2county, how="left", left_on='FIPS', right_on='CountyFIPS')
# Insert a county "County, ST" col (i.e. "Freehold, NJ" or "Chicago, IL") for ease
df_wide.insert(1, "County_St", df_wide["CountyName"].astype(str) + ", " + df_wide["StateAbbr"].astype(str))
# Display with all the columns
with pd.option_context('display.max_rows', 3, 'display.max_columns', None):
display(df_wide)
pass
loc_main = ["FIPS", "County", "StateFIPS", "Total Medicare Payment Amount: 2019", "CountyFIPS_3", "CountyName", "StateName", "CountyFIPS", "StateAbbr", "STATE_COUNTY"]
#a=pd.merge(right=df_orig, left=fips2county, how="outer", right_on='FIPS', left_on='CountyFIPS')
#a=a.loc[:,loc_main]
#df_orig2=df_orig.loc[:,["FIPS","pop","Moran I score for ACS billing fraction","County"]]
| FIPS | County_St | Total Number of Services | Total Medicare Payment Amount | Total Number of Services: 2019 | Total Medicare Payment Amount: 2019 | Total Number of Services: 2018 | Total Medicare Payment Amount: 2018 | Total Number of Services: 2017 | Total Medicare Payment Amount: 2017 | Total Number of Services: 2016 | Total Medicare Payment Amount: 2016 | Total Number of Services: 2015 | Total Medicare Payment Amount: 2015 | tot_ratio | % ASC Procedures: 2019 | % ASC Billing: 2019 | % ASC Procedures: 2018 | % ASC Billing: 2018 | % ASC Procedures: 2017 | % ASC Billing: 2017 | % ASC Procedures: 2016 | % ASC Billing: 2016 | % ASC Procedures: 2015 | % ASC Billing: 2015 | % ASC Procedures | % ASC Billing | Beneficiaries with Part A and Part B | Average Age | Percent Male | Percent Non-Hispanic White | Percent African American | Percent Hispanic | Percent Eligible for Medicaid | Average HCC Score | Hospital Readmission Rate | Emergency Department Visits per 1000 Beneficiaries | Procedures Per Capita Standardized Costs | Procedure Events Per 1000 Beneficiaries | metro | pct_poverty | median_house_income | pop | 2013_Rural_urban_cont_code | Pct_wthout_high_diploma | Pct_wth_high_diploma | Pct_wth_some_coll | Pct_wth_coll_degree | unemployment | pct_uninsured | fibro | tabacco | obesity | migrane | Alzheimers | Depression | Alcohol Abuse | Drug Abuse | Schizo_othr_psych | COPD | Chronic Kidney Disease | Osteoporosis | Stroke | Diabetes | Asthma | Arthritis | Hypertension | Heart Failure | Ischemic Heart Disease | Population Density | Medicare Population Density | Moran I score for ACS billing fraction | County | StateFIPS | CountyFIPS_3 | CountyName | StateName | CountyFIPS | StateAbbr | STATE_COUNTY | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 01017 | Chambers, AL | 408.0 | 30064.800000 | 157.0 | 10363.09 | 115.0 | 6360.510000 | 136.0 | 13341.20 | 0.0 | 0.00 | 0.0 | 0.00 | 14.196990 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.000000 | 8489.0 | 69.2 | 44.126 | 64.466 | 34.326 | 0.286 | 28.144 | 1.04 | 16.296 | 726.4 | 502.664 | 3765.2 | 0 | 19.76 | 38781.0 | 33660.4 | 6.0 | 18.4 | 36.7 | 31.6 | 13.3 | 4.52 | 11.94 | 23.6 | 11.0 | 22.2 | 2.8 | 11.06 | 13.92 | 2.125 | 3.5 | 3.30 | 14.46 | 25.76 | 4.40 | 3.54 | 34.70 | 4.10 | 35.56 | 67.0 | 16.78 | 31.58 | 56.426908 | 14.230610 | Non Significant | Chambers | 01 | 017 | Chambers | Alabama | 01017 | AL | AL | CHAMBERS |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 940 | 51041 | Chesterfield, VA | 617.0 | 204885.309999 | 111.0 | 47135.49 | 167.0 | 54715.569999 | 187.0 | 53982.17 | 78.0 | 24873.24 | 74.0 | 24178.84 | 34.339788 | 28.828829 | 51.859904 | 47.305389 | 61.493685 | 41.176471 | 61.207895 | 15.384615 | 20.982791 | 0.0 | 0.0 | 32.414911 | 47.027037 | 52776.6 | 72.0 | 44.492 | 79.032 | 15.324 | 1.498 | 8.708 | 0.89 | 16.836 | 597.8 | 663.378 | 5401.4 | 1 | 6.96 | 78863.0 | 343550.6 | 1.0 | 7.7 | 23.6 | 28.3 | 40.5 | 3.42 | 8.98 | 17.6 | 6.8 | 20.0 | 2.8 | 9.76 | 16.92 | 1.550 | 1.6 | 2.82 | 9.02 | 20.84 | 5.48 | 3.50 | 26.98 | 5.34 | 31.40 | 59.0 | 10.50 | 25.18 | 811.606508 | 124.679835 | Non Significant | Chesterfield | 51 | 041 | Chesterfield | Virginia | 51041 | VA | VA | CHESTERFIELD |
941 rows × 80 columns
cols_to_keep = ["FIPS","County_St"]
col_categories = ["Total Number of Services:", "Total Medicare Payment Amount:", "% ASC Procedures:", "% ASC Billing:"]
df_longs = []
# Convert each type of category to long format in separate dataframes
for col_category in col_categories:
df_long = df_wide.melt(id_vars=cols_to_keep,
var_name="Year",
value_vars=[f"{col_category} {year}" for year in range(2015, 2019 +1)],
value_name=f"{col_category} in Year",
)
df_long["Year"] = df_long["Year"].replace({ f"{col_category} {year}":f"{year}" for year in range(2015, 2019 +1)})
df_longs.append(df_long)
# Merge the separate category dataframes
df_long = df_longs[0]
for ind in range(1,len(df_longs)):
df_long = pd.merge(left=df_long, right=df_longs[ind], how="outer", on=(cols_to_keep+["Year"]) )
# Merge with the overall wide dataframe to keep those other values
df_long = pd.merge(left=df_long,
right=df_wide.drop([f"{col_category} {year}" for year in range(2015, 2019 +1) for col_category in col_categories], axis=1),
how="left", on=cols_to_keep)
display(df_long)
| FIPS | County_St | Year | Total Number of Services: in Year | Total Medicare Payment Amount: in Year | % ASC Procedures: in Year | % ASC Billing: in Year | Total Number of Services | Total Medicare Payment Amount | tot_ratio | ... | Medicare Population Density | Moran I score for ACS billing fraction | County | StateFIPS | CountyFIPS_3 | CountyName | StateName | CountyFIPS | StateAbbr | STATE_COUNTY | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 01017 | Chambers, AL | 2015 | 0.0 | 0.00 | 0.000000 | 0.000000 | 408.0 | 30064.800000 | 14.196990 | ... | 14.230610 | Non Significant | Chambers | 01 | 017 | Chambers | Alabama | 01017 | AL | AL | CHAMBERS |
| 1 | 01033 | Colbert, AL | 2015 | 108.0 | 10404.39 | 0.000000 | 0.000000 | 272.0 | 37080.230000 | 16.000000 | ... | 22.681014 | Non Significant | Colbert | 01 | 033 | Colbert | Alabama | 01033 | AL | AL | COLBERT |
| 2 | 01045 | Dale, AL | 2015 | 0.0 | 0.00 | 0.000000 | 0.000000 | 12.0 | 405.210000 | 0.999104 | ... | 17.700437 | Non Significant | Dale | 01 | 045 | Dale | Alabama | 01045 | AL | AL | DALE |
| 3 | 01083 | Limestone, AL | 2015 | 0.0 | 0.00 | 0.000000 | 0.000000 | 55.0 | 9515.590000 | 4.000000 | ... | 29.157261 | Non Significant | Limestone | 01 | 083 | Limestone | Alabama | 01083 | AL | AL | LIMESTONE |
| 4 | 05145 | White, AR | 2015 | 1217.0 | 48412.57 | 0.000000 | 0.000000 | 1269.0 | 52190.220000 | 11.995594 | ... | 15.224018 | Non Significant | White | 05 | 145 | White | Arkansas | 05145 | AR | AR | WHITE |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4700 | 21073 | Franklin, KY | 2019 | 0.0 | 0.00 | 0.000000 | 0.000000 | 114.0 | 7749.960000 | 3.910144 | ... | 62.858188 | Non Significant | Franklin | 21 | 073 | Franklin | Kentucky | 21073 | KY | KY | FRANKLIN |
| 4701 | 56021 | Laramie, WY | 2019 | 422.0 | 79083.86 | 100.000000 | 100.000000 | 1784.0 | 337949.890001 | 21.000000 | ... | 6.286729 | Non Significant | Laramie | 56 | 021 | Laramie | Wyoming | 56021 | WY | WY | LARAMIE |
| 4702 | 54041 | Lewis, WV | 2019 | 0.0 | 0.00 | 0.000000 | 0.000000 | 606.0 | 26648.230000 | 4.000000 | ... | 10.524948 | Low-Low | Lewis | 54 | 041 | Lewis | West Virginia | 54041 | WV | WV | LEWIS |
| 4703 | 50027 | Windsor, VT | 2019 | 319.0 | 12093.61 | 0.000000 | 0.000000 | 1132.0 | 47825.130000 | 35.000000 | ... | 14.320922 | Low-Low | Windsor | 50 | 027 | Windsor | Vermont | 50027 | VT | VT | WINDSOR |
| 4704 | 51041 | Chesterfield, VA | 2019 | 111.0 | 47135.49 | 28.828829 | 51.859904 | 617.0 | 204885.309999 | 34.339788 | ... | 124.679835 | Non Significant | Chesterfield | 51 | 041 | Chesterfield | Virginia | 51041 | VA | VA | CHESTERFIELD |
4705 rows × 65 columns
Warning: Total number of columns (65) exceeds max_columns (20) limiting to first (20) columns.
fig = px.choropleth(df_wide, geojson=counties, locations='FIPS',
color='% ASC Procedures',
color_continuous_scale="Viridis",
#range_color=(0, 12),
scope="usa",
#facet_col="Moran I score for ACS billing fraction",
labels={
"2013_Rural_urban_cont_code":"2013-RUCA",
"pop":"Pop.",
"Average Age":"Mean Age",
"Percent Male":"% M",
"tot_ratio":"Tot. Ratio",
},
hover_name="County_St",
hover_data={
'% ASC Procedures': ":.0f",
"FIPS":True,
"pop": ":.1f",
"2013_Rural_urban_cont_code":True,
"Average Age": ":.1f",
"Percent Male": ":.1f",
},
)
fig.update_layout(
hoverlabel=dict(
bgcolor="white",
font_size=16,
font_family="Rockwell",
align="auto"
)
)
fig.update_layout(legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
))
# Define layout specificities
fig.update_layout(
margin={"r":0,"t":0,"l":0,"b":0},
title={
'text': f"% ASC Procedures",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'
}
)
#fig.show()
save_figure(fig,"choropleth-total")
fig = px.choropleth(df_long, geojson=counties, locations='FIPS',
color='% ASC Procedures: in Year',
color_continuous_scale="Viridis",
#range_color=(0, 12),
scope="usa",
#facet_col="Moran I score for ACS billing fraction",
labels={
"2013_Rural_urban_cont_code":"2013-RUCA",
"pop":"Pop.",
"Average Age":"Mean Age",
"Percent Male":"% M",
"tot_ratio":"Tot. Ratio",
},
hover_name="County_St",
hover_data={
"FIPS":True,
"pop": ":.1f",
"2013_Rural_urban_cont_code":True,
"Average Age": ":.1f",
"Percent Male": ":.1f",
},
animation_frame="Year",
)
fig.update_layout(
hoverlabel=dict(
bgcolor="white",
font_size=16,
font_family="Rockwell",
align="auto"
)
)
fig.update_layout(legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
))
# Define layout specificities
fig.update_layout(
margin={"r":0,"t":0,"l":0,"b":0},
title={
'text': f"% ASC Procedures by Year",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'
}
)
fig.show()
save_figure(fig,"choropleth-all", animated=True)